1 /** 2 * SIMD-accelerated Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0) 6 */ 7 module containers.simdset; 8 9 private import std.experimental.allocator.mallocator : Mallocator; 10 11 /** 12 * Set implementation that is well suited for small sets and simple items. 13 * 14 * Uses SSE instructions to compare multiple elements simultaneously, but has 15 * linear time complexity. 16 * 17 * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in 18 * this container unless they are also stored somewhere else. 19 * 20 * Params: 21 * T = the element type 22 * Allocator = the allocator to use. Defaults to `Mallocator`. 23 */ 24 version (D_InlineAsm_X86_64) struct SimdSet(T, Allocator = Mallocator) 25 if (T.sizeof == 1 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8) 26 { 27 this(this) @disable; 28 29 private import std.experimental.allocator.common : stateSize; 30 31 static if (stateSize!Allocator != 0) 32 { 33 /// No default construction if an allocator must be provided. 34 this() @disable; 35 36 /** 37 * Use the given `allocator` for allocations. 38 */ 39 this(Allocator allocator) 40 in 41 { 42 assert(allocator !is null, "Allocator must not be null"); 43 } 44 do 45 { 46 this.allocator = allocator; 47 } 48 } 49 50 ~this() 51 { 52 scope (failure) assert(false, "SimdSet destructor threw an exception"); 53 clear(); 54 } 55 56 void clear() 57 { 58 allocator.deallocate(cast(void[]) storage); 59 _length = 0; 60 storage = []; 61 } 62 63 /** 64 * Params: 65 * item = the item to check 66 * Returns: 67 * true if the set contains the given item 68 */ 69 bool contains(T item) const pure nothrow @nogc @trusted 70 { 71 if (_length == 0) 72 return false; 73 bool retVal; 74 immutable remainder = _length % (16 / T.sizeof); 75 ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1; 76 //ushort resultMask; 77 ulong ptrStart = cast(ulong) storage.ptr; 78 ulong ptrEnd = ptrStart + storage.length * T.sizeof; 79 static if (T.sizeof == 1) 80 ulong needle = (cast(ubyte) item) * 0x01010101_01010101; 81 else static if (T.sizeof == 2) 82 ulong needle = (cast(ushort) item) * 0x00010001_00010001; 83 else static if (T.sizeof == 4) 84 ulong needle = (cast(ulong) item) * 0x00000001_00000001; 85 else static if (T.sizeof == 8) 86 ulong needle = cast(ulong) item; 87 else 88 static assert(false); 89 mixin(asmSearch()); 90 end: 91 return retVal; 92 } 93 94 /// ditto 95 bool opBinaryRight(string op)(T item) const pure nothrow @nogc @safe if (op == "in") 96 { 97 return contains(item); 98 } 99 100 /** 101 * Inserts the given item into the set. 102 * 103 * Params: 104 * item = the item to insert 105 * Returns: 106 * true if the item was inserted or false if it was already present 107 */ 108 bool insert(T item) 109 { 110 if (contains(item)) 111 return false; 112 if (storage.length > _length) 113 storage[_length] = item; 114 else 115 { 116 immutable size_t cl = (storage.length * T.sizeof); 117 immutable size_t nl = cl + 16; 118 void[] a = cast(void[]) storage; 119 allocator.reallocate(a, nl); 120 storage = cast(typeof(storage)) a; 121 storage[_length] = item; 122 } 123 _length++; 124 return true; 125 } 126 127 /// ditto 128 bool opOpAssign(string op)(T item) if (op == "~") 129 { 130 return insert(item); 131 } 132 133 /// ditto 134 alias insertAnywhere = insert; 135 136 /// ditto 137 alias put = insert; 138 139 /** 140 * Removes the given item from the set. 141 * 142 * Params: 143 * item = the time to remove 144 * Returns: 145 * true if the item was removed, false if it was not present 146 */ 147 bool remove(T item) 148 { 149 import std.algorithm : countUntil; 150 151 // TODO: Make this more efficient 152 153 ptrdiff_t begin = countUntil(storage, item); 154 if (begin == -1) 155 return false; 156 foreach (i; begin .. _length - 1) 157 storage[i] = storage[i + 1]; 158 _length--; 159 return true; 160 } 161 162 /** 163 * Slice operator 164 */ 165 auto opSlice(this This)() 166 { 167 import containers.internal.element_type : ContainerElementType; 168 return cast(ContainerElementType!(This, T)[]) storage[0 .. _length]; 169 } 170 171 /** 172 * Returns: 173 * the number of items in the set 174 */ 175 size_t length() const pure nothrow @nogc @property 176 { 177 return _length; 178 } 179 180 invariant 181 { 182 assert((storage.length * T.sizeof) % 16 == 0, "Bad storage length"); 183 } 184 185 private: 186 187 import containers.internal.storage_type : ContainerStorageType; 188 private import containers.internal.mixins : AllocatorState; 189 190 static string asmSearch() 191 { 192 import std.string : format; 193 194 static if (T.sizeof == 1) 195 enum instruction = `pcmpeqb`; 196 else static if (T.sizeof == 2) 197 enum instruction = `pcmpeqw`; 198 else static if (T.sizeof == 4) 199 enum instruction = `pcmpeqd`; 200 else static if (T.sizeof == 8) 201 enum instruction = `pcmpeqq`; 202 else 203 static assert(false); 204 205 static if (__VERSION__ >= 2067) 206 string s = `asm pure nothrow @nogc`; 207 else 208 string s = `asm`; 209 210 return s ~ ` 211 { 212 mov R8, ptrStart; 213 mov R9, ptrEnd; 214 sub R8, 16; 215 sub R9, 16; 216 movq XMM0, needle; 217 shufpd XMM0, XMM0, 0; 218 loop: 219 add R8, 16; 220 movdqu XMM1, [R8]; 221 %s XMM1, XMM0; 222 pmovmskb RAX, XMM1; 223 //mov resultMask, AX; 224 mov BX, AX; 225 and BX, mask; 226 cmp R8, R9; 227 cmove AX, BX; 228 popcnt AX, AX; 229 test AX, AX; 230 jnz found; 231 cmp R8, R9; 232 jl loop; 233 mov retVal, 0; 234 jmp end; 235 found: 236 mov retVal, 1; 237 jmp end; 238 }`.format(instruction); 239 } 240 241 mixin AllocatorState!Allocator; 242 ContainerStorageType!(T)[] storage; 243 size_t _length; 244 } 245 246 /// 247 version (D_InlineAsm_X86_64) version(emsi_containers_unittest) unittest 248 { 249 import std.string : format; 250 251 void testSimdSet(T)() 252 { 253 SimdSet!T set; 254 assert(set.insert(1)); 255 assert(set.length == 1); 256 assert(set.contains(1)); 257 assert(!set.insert(1)); 258 set.insert(0); 259 set.insert(20); 260 assert(set.contains(1)); 261 assert(set.contains(0)); 262 assert(!set.contains(10)); 263 assert(!set.contains(50)); 264 assert(set.contains(20)); 265 foreach (T i; 28 .. 127) 266 set.insert(i); 267 foreach (T i; 28 .. 127) 268 assert(set.contains(i), "%d".format(i)); 269 foreach (T i; 28 .. 127) 270 assert(set.remove(i)); 271 assert(set.length == 3, "%d".format(set.length)); 272 assert(set.contains(0)); 273 assert(set.contains(1)); 274 assert(set.contains(20)); 275 assert(!set.contains(28)); 276 } 277 278 testSimdSet!ubyte(); 279 testSimdSet!ushort(); 280 testSimdSet!uint(); 281 testSimdSet!ulong(); 282 testSimdSet!byte(); 283 testSimdSet!short(); 284 testSimdSet!int(); 285 testSimdSet!long(); 286 } 287 288 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1 289 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)) 290 { 291 import std.string : format; 292 static assert (false, ("Cannot instantiate SimdSet of type %s because its size " 293 ~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof)); 294 }